import pandas
import os
import matplotlib.pyplot as plt
import numpy as np


# 读取 CSV 数据
baseDir = './2025_Problem_C_Data'
hosts = pandas.read_csv(os.path.join(baseDir, 'summerOly_hosts.csv'), encoding='utf-8', low_memory=False)
medals = pandas.read_csv(os.path.join(baseDir, 'summerOly_medal_counts.csv'), encoding='utf-8', low_memory=False)

# 数据预处理，得到主办国家
hosts['Host'] = hosts['Host'].str.split(',').str[1]
hosts['Host'] = hosts['Host'].str.split('(').str[0]
hosts['Host'] = hosts['Host'].str.strip()

# 将 United Kingdom 替换为 Great Britain
hosts['Host'] = hosts['Host'].replace('United Kingdom', 'Great Britain')
hosts = hosts[(hosts['Year'] >= 2000) & (hosts['Year'] <= 2024)]

# 获取这些国家的平均奖牌数
for id, row in hosts.iterrows():

    records = medals[
        (medals['NOC'] == row['Host']) &
        (medals['Year'] >= 2000) &
        (medals['Year'] <= 2024)
        ]
    
    avg_rank  = round(records['Rank'].mean(), 2)
    host_rank = round(records[records['Year'] == row['Year']]['Rank'].mean())
    print(f'{row["Host"]}\t 平均排名，: {avg_rank}, {row["Year"]} 年排名: {host_rank}')




